import pandas as pd
filepath = "C:\\Users\\abdelilah\\Desktop\\Python Projects\\London Housing\\"
housing_df = pd.read_csv(filepath + "London Housing Data.csv", delimiter = ",")
housing_df
date | area | average_price | code | houses_sold | no_of_crimes | |
---|---|---|---|---|---|---|
0 | 1/1/1995 | city of london | 91449 | E09000001 | 17.0 | NaN |
1 | 2/1/1995 | city of london | 82203 | E09000001 | 7.0 | NaN |
2 | 3/1/1995 | city of london | 79121 | E09000001 | 14.0 | NaN |
3 | 4/1/1995 | city of london | 77101 | E09000001 | 7.0 | NaN |
4 | 5/1/1995 | city of london | 84409 | E09000001 | 10.0 | NaN |
... | ... | ... | ... | ... | ... | ... |
13544 | 9/1/2019 | england | 249942 | E92000001 | 64605.0 | NaN |
13545 | 10/1/2019 | england | 249376 | E92000001 | 68677.0 | NaN |
13546 | 11/1/2019 | england | 248515 | E92000001 | 67814.0 | NaN |
13547 | 12/1/2019 | england | 250410 | E92000001 | NaN | NaN |
13548 | 1/1/2020 | england | 247355 | E92000001 | NaN | NaN |
13549 rows × 6 columns
housing_df.count()
date 13549 area 13549 average_price 13549 code 13549 houses_sold 13455 no_of_crimes 7439 dtype: int64
First Task: Convert the Datatype of 'Date' column to Date-Time format.
housing_df["date"] = pd.to_datetime(housing_df.date)
housing_df.dtypes
date datetime64[ns] area object average_price int64 code object houses_sold float64 no_of_crimes float64 dtype: object
Second Task:
B.1 Add a new column ''year'' in the dataframe, which contains years only.
housing_df.insert(1, "year", "")
housing_df.head(2)
date | year | area | average_price | code | houses_sold | no_of_crimes | |
---|---|---|---|---|---|---|---|
0 | 1995-01-01 | city of london | 91449 | E09000001 | 17.0 | NaN | |
1 | 1995-02-01 | city of london | 82203 | E09000001 | 7.0 | NaN |
housing_df["year"] = housing_df.date.dt.year
B.2 Add a new column ''month'' as 2nd column in the dataframe, which contains month only.
housing_df.insert(1, "month", "")
housing_df["month"] = housing_df.date.dt.month
Third Task: Remove the columns 'year' and 'month' from the dataframe.
housing_df = housing_df.drop(["month", "year"], axis = 1)
Fouth Task: Show all the records where 'No. of Crimes' is 0. And, how many such records are there ?
no_crimes = housing_df[housing_df["no_of_crimes"] == 0]
zero_crimes = no_crimes.shape[0]
Fifth Task: What is the maximum & minimum 'average_price' per year in england ?
housing_df.insert(1, "year", "")
housing_df["year"] = housing_df.date.dt.year
eng_housing = housing_df[housing_df["area"] == "england"]
years = housing_df["year"].unique()
price_range = pd.DataFrame(columns = ["Year", "Min Price", "Max Price"])
price_range["Year"] = years
price_range = price_range.set_index("Year")
for y in years:
that_year = eng_housing[eng_housing["year"] == y]
price_range.loc[y,"Min Price"] = that_year["average_price"].min()
price_range.loc[y,"Max Price"] = that_year["average_price"].max()
price_range
Min Price | Max Price | |
---|---|---|
Year | ||
1995 | 52788 | 53901 |
1996 | 52333 | 55755 |
1997 | 55789 | 61564 |
1998 | 61659 | 65743 |
1999 | 65522 | 75071 |
2000 | 75219 | 84191 |
2001 | 84245 | 95992 |
2002 | 96215 | 119982 |
2003 | 121610 | 138985 |
2004 | 139719 | 160330 |
2005 | 158572 | 167244 |
2006 | 166544 | 182031 |
2007 | 181824 | 194764 |
2008 | 165795 | 191750 |
2009 | 159340 | 174136 |
2010 | 174458 | 180807 |
2011 | 173046 | 177335 |
2012 | 174161 | 180129 |
2013 | 176816 | 188544 |
2014 | 188265 | 203639 |
2015 | 202856 | 219582 |
2016 | 220361 | 231922 |
2017 | 231593 | 242628 |
2018 | 240428 | 248620 |
2019 | 243281 | 250410 |
2020 | 247355 | 247355 |
Sixth Task: What is the Maximum & Minimum No. of Crimes recorded per area ?
areas = housing_df["area"].unique()
crime_range = pd.DataFrame(columns = ["Area", "Min No Crime", "Max No Crime"])
crime_range["Area"] = areas
crime_range = crime_range.set_index("Area")
for a in areas:
that_area = housing_df[housing_df["area"] == a]
crime_range.loc[a,"Min No Crime"] = that_area["no_of_crimes"].min()
crime_range.loc[a,"Max No Crime"] = that_area["no_of_crimes"].max()
crime_range
Min No Crime | Max No Crime | |
---|---|---|
Area | ||
city of london | 0.0 | 10.0 |
barking and dagenham | 1217.0 | 2049.0 |
barnet | 1703.0 | 2893.0 |
bexley | 860.0 | 1914.0 |
brent | 1850.0 | 2937.0 |
bromley | 1441.0 | 2637.0 |
camden | 2079.0 | 4558.0 |
croydon | 2031.0 | 3263.0 |
ealing | 1871.0 | 3401.0 |
enfield | 1635.0 | 2798.0 |
tower hamlets | 1646.0 | 3316.0 |
greenwich | 1513.0 | 2853.0 |
hackney | 1870.0 | 3466.0 |
south east | NaN | NaN |
hammersmith and fulham | 1323.0 | 2645.0 |
haringey | 1536.0 | 3199.0 |
harrow | 937.0 | 1763.0 |
havering | 1130.0 | 1956.0 |
hillingdon | 1445.0 | 2819.0 |
hounslow | 1529.0 | 2817.0 |
islington | 1871.0 | 3384.0 |
kensington and chelsea | 1347.0 | 2778.0 |
kingston upon thames | 692.0 | 1379.0 |
lambeth | 2381.0 | 4701.0 |
lewisham | 1675.0 | 2813.0 |
merton | 819.0 | 1623.0 |
newham | 2130.0 | 3668.0 |
redbridge | 1487.0 | 2560.0 |
richmond upon thames | 700.0 | 1551.0 |
southwark | 2267.0 | 3821.0 |
sutton | 787.0 | 1425.0 |
waltham forest | 1575.0 | 2941.0 |
wandsworth | 1582.0 | 3051.0 |
westminster | 3504.0 | 7461.0 |
inner london | NaN | NaN |
outer london | NaN | NaN |
north east | NaN | NaN |
north west | NaN | NaN |
yorks and the humber | NaN | NaN |
east midlands | NaN | NaN |
west midlands | NaN | NaN |
east of england | NaN | NaN |
london | NaN | NaN |
south west | NaN | NaN |
england | NaN | NaN |
Seventh Task: Show the total count of records of each area, where average price is less than 100000.
housing_6 = housing_df[housing_df["average_price"] < 100000]
area_count = pd.DataFrame(columns = ["Area", "Count"])
area_count["Area"] = areas
area_count = area_count.set_index("Area")
for a in areas:
that_area = housing_6[housing_6["area"] == a]
area_count.loc[a, "Count"] = that_area.shape[0]
area_count.name = "Count of houses under 100000 for each area"
area_count
Count | |
---|---|
Area | |
city of london | 11 |
barking and dagenham | 85 |
barnet | 25 |
bexley | 64 |
brent | 40 |
bromley | 33 |
camden | 0 |
croydon | 57 |
ealing | 31 |
enfield | 54 |
tower hamlets | 47 |
greenwich | 59 |
hackney | 53 |
south east | 59 |
hammersmith and fulham | 0 |
haringey | 33 |
harrow | 30 |
havering | 60 |
hillingdon | 44 |
hounslow | 41 |
islington | 19 |
kensington and chelsea | 0 |
kingston upon thames | 30 |
lambeth | 41 |
lewisham | 62 |
merton | 35 |
newham | 72 |
redbridge | 52 |
richmond upon thames | 0 |
southwark | 48 |
sutton | 54 |
waltham forest | 64 |
wandsworth | 26 |
westminster | 0 |
inner london | 31 |
outer london | 46 |
north east | 112 |
north west | 111 |
yorks and the humber | 110 |
east midlands | 96 |
west midlands | 94 |
east of england | 76 |
london | 39 |
south west | 78 |
england | 87 |